This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter (in Windows press CTRL+Enter).
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I (or CTRL+Alt+I in Windows).
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
Here i am uploading my packages (code omitted).
names(wage1)
## [1] "wage" "educ" "exper" "tenure"
## [5] "nonwhite" "female" "married" "numdep"
## [9] "smsa" "northcen" "south" "west"
## [13] "construc" "ndurman" "trcommpu" "trade"
## [17] "services" "profserv" "profocc" "clerocc"
## [21] "servocc" "lwage" "expersq" "tenursq"
## [25] "female_married" "exper_tenure"
head(wage1) # first 6 observations
str(wage1) # structure of dataset
## 'data.frame': 526 obs. of 26 variables:
## $ wage : num 3.1 3.24 3 6 5.3 ...
## ..- attr(*, "label")= chr "log(wage)"
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ educ : num 11 12 11 8 12 16 18 12 12 17 ...
## ..- attr(*, "label")= chr "years of education"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ exper : num 2 22 2 44 7 9 15 5 26 22 ...
## ..- attr(*, "label")= chr "years potential experience"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ tenure : num 0 2 0 28 2 8 7 3 4 21 ...
## ..- attr(*, "label")= chr "years with current employer"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ nonwhite : num 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "=1 if nonwhite"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ female : num 1 1 0 0 0 0 0 1 1 0 ...
## ..- attr(*, "label")= chr "=1 if female"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ married : num 0 1 0 1 1 1 0 0 0 1 ...
## ..- attr(*, "label")= chr "=1 if married"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ numdep : num 2 3 2 0 1 0 0 0 2 0 ...
## ..- attr(*, "label")= chr "number of dependents"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ smsa : num 1 1 0 1 0 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "=1 if live in SMSA"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ northcen : num 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "=1 if live in north central U.S"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ south : num 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "=1 if live in southern region"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ west : num 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "=1 if live in western region"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ construc : num 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "=1 if work in construc. indus."
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ ndurman : num 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "=1 if in nondur. manuf. indus."
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ trcommpu : num 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "=1 if in trans, commun, pub ut"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ trade : num 0 0 1 0 0 0 1 0 1 0 ...
## ..- attr(*, "label")= chr "=1 if in wholesale or retail"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ services : num 0 1 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "=1 if in services indus."
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ profserv : num 0 0 0 0 0 1 0 0 0 0 ...
## ..- attr(*, "label")= chr "=1 if in prof. serv. indus."
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ profocc : num 0 0 0 0 0 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "=1 if in profess. occupation"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ clerocc : num 0 0 0 1 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "=1 if in clerical occupation"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ servocc : num 0 1 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "=1 if in service occupation"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ lwage : num 1.13 1.18 1.1 1.79 1.67 ...
## ..- attr(*, "label")= chr "log(wage)"
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ expersq : num 4 484 4 1936 49 ...
## ..- attr(*, "label")= chr "exper^2"
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ tenursq : num 0 4 0 784 4 64 49 9 16 441 ...
## ..- attr(*, "label")= chr "tenure^2"
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ female_married: num 0 1 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "=1 if female"
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ exper_tenure : num 0 44 0 1232 14 ...
## ..- attr(*, "label")= chr "years potential experience"
## ..- attr(*, "format.stata")= chr "%8.0g"
ExpData(wage1,type=1)
ExpData(wage1,type=2)
dplyr::glimpse(wage1)
## Rows: 526
## Columns: 26
## $ wage <dbl> 3.100000, 3.240000, 3.000000, 6.000000, 5.300000, 8.750…
## $ educ <dbl> 11, 12, 11, 8, 12, 16, 18, 12, 12, 17, 16, 13, 12, 12, …
## $ exper <dbl> 2, 22, 2, 44, 7, 9, 15, 5, 26, 22, 8, 3, 15, 18, 31, 14…
## $ tenure <dbl> 0, 2, 0, 28, 2, 8, 7, 3, 4, 21, 2, 0, 0, 3, 15, 0, 0, 1…
## $ nonwhite <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ female <dbl> 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1…
## $ married <dbl> 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1…
## $ numdep <dbl> 2, 3, 2, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 1, 1, 0, 0, 3…
## $ smsa <dbl> 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ northcen <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ south <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ west <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ construc <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ndurman <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ trcommpu <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ trade <dbl> 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ services <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ profserv <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0…
## $ profocc <dbl> 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0…
## $ clerocc <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0…
## $ servocc <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ lwage <dbl> 1.1314021, 1.1755733, 1.0986123, 1.7917595, 1.6677068, …
## $ expersq <dbl> 4, 484, 4, 1936, 49, 81, 225, 25, 676, 484, 64, 9, 225,…
## $ tenursq <dbl> 0, 4, 0, 784, 4, 64, 49, 9, 16, 441, 4, 0, 0, 9, 225, 0…
## $ female_married <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1…
## $ exper_tenure <dbl> 0, 44, 0, 1232, 14, 72, 105, 15, 104, 462, 16, 0, 0, 54…
dplyr::glimpse(wage1$lwage)
## num [1:526] 1.13 1.18 1.1 1.79 1.67 ...
## - attr(*, "label")= chr "log(wage)"
## - attr(*, "format.stata")= chr "%9.0g"
mean(wage1$educ)
## [1] 12.56274
median(wage1$educ)
## [1] 12
min(wage1$educ)
## [1] 0
max(wage1$educ)
## [1] 18
range(wage1$educ)
## [1] 0 18
quantile(wage1$educ)
## 0% 25% 50% 75% 100%
## 0 12 12 14 18
quantile(wage1$educ,0.8)
## 80%
## 15
IQR(wage1$educ)
## [1] 2
sd(wage1$educ)
## [1] 2.769022
var(wage1$educ)
## [1] 7.667485
lapply(wage1[,1:3],sd)
## $wage
## [1] 3.693086
##
## $educ
## [1] 2.769022
##
## $exper
## [1] 13.57216
summary(wage1$educ)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 12.00 12.00 12.56 14.00 18.00
by(wage1,wage1$female,summary)
## wage1$female: 0
## wage educ exper tenure
## Min. : 1.500 Min. : 2.00 Min. : 1.00 Min. : 0.000
## 1st Qu.: 4.143 1st Qu.:12.00 1st Qu.: 6.00 1st Qu.: 0.000
## Median : 6.000 Median :12.00 Median :14.00 Median : 3.000
## Mean : 7.099 Mean :12.79 Mean :17.56 Mean : 6.474
## 3rd Qu.: 8.765 3rd Qu.:15.00 3rd Qu.:28.00 3rd Qu.: 9.000
## Max. :24.980 Max. :18.00 Max. :51.00 Max. :44.000
## nonwhite female married numdep smsa
## Min. :0.0000 Min. :0 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0 Median :1.0000 Median :0.000 Median :1.0000
## Mean :0.1058 Mean :0 Mean :0.6861 Mean :1.004 Mean :0.7153
## 3rd Qu.:0.0000 3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :0 Max. :1.0000 Max. :6.000 Max. :1.0000
## northcen south west construc
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.2445 Mean :0.3759 Mean :0.1496 Mean :0.06204
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## ndurman trcommpu trade services
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.1423 Mean :0.04745 Mean :0.3102 Mean :0.06934
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## profserv profocc clerocc servocc
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.1679 Mean :0.4489 Mean :0.04015 Mean :0.08759
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## lwage expersq tenursq female_married
## Min. :0.4055 Min. : 1.0 Min. : 0.0 Min. :0
## 1st Qu.:1.4212 1st Qu.: 36.0 1st Qu.: 0.0 1st Qu.:0
## Median :1.7918 Median : 196.0 Median : 9.0 Median :0
## Mean :1.8136 Mean : 489.9 Mean : 111.7 Mean :0
## 3rd Qu.:2.1708 3rd Qu.: 784.0 3rd Qu.: 81.0 3rd Qu.:0
## Max. :3.2181 Max. :2601.0 Max. :1936.0 Max. :0
## exper_tenure
## Min. : 0.0
## 1st Qu.: 0.0
## Median : 40.0
## Mean : 176.8
## 3rd Qu.: 182.0
## Max. :2068.0
## ------------------------------------------------------------
## wage1$female: 1
## wage educ exper tenure
## Min. : 0.530 Min. : 0.00 Min. : 1.00 Min. : 0.000
## 1st Qu.: 3.000 1st Qu.:12.00 1st Qu.: 5.00 1st Qu.: 0.000
## Median : 3.750 Median :12.00 Median :13.00 Median : 2.000
## Mean : 4.588 Mean :12.32 Mean :16.43 Mean : 3.615
## 3rd Qu.: 5.510 3rd Qu.:13.00 3rd Qu.:26.00 3rd Qu.: 4.000
## Max. :21.630 Max. :18.00 Max. :50.00 Max. :34.000
## nonwhite female married numdep
## Min. :0.00000 Min. :1 Min. :0.0000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:1 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.00000 Median :1 Median :1.0000 Median :1.000
## Mean :0.09921 Mean :1 Mean :0.5238 Mean :1.087
## 3rd Qu.:0.00000 3rd Qu.:1 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :1.00000 Max. :1 Max. :1.0000 Max. :5.000
## smsa northcen south west
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.7302 Mean :0.2579 Mean :0.3333 Mean :0.1905
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## construc ndurman trcommpu trade
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.02778 Mean :0.08333 Mean :0.03968 Mean :0.2619
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## services profserv profocc clerocc
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1349 Mean :0.3571 Mean :0.2778 Mean :0.3056
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## servocc lwage expersq tenursq
## Min. :0.0000 Min. :-0.6349 Min. : 1.0 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.: 1.0986 1st Qu.: 25.0 1st Qu.: 0.00
## Median :0.0000 Median : 1.3218 Median : 169.0 Median : 4.00
## Mean :0.1984 Mean : 1.4164 Mean : 455.6 Mean : 41.66
## 3rd Qu.:0.0000 3rd Qu.: 1.7065 3rd Qu.: 676.0 3rd Qu.: 16.00
## Max. :1.0000 Max. : 3.0741 Max. :2500.0 Max. :1156.00
## female_married exper_tenure
## Min. :0.0000 Min. : 0.0
## 1st Qu.:0.0000 1st Qu.: 0.0
## Median :1.0000 Median : 12.5
## Mean :0.5238 Mean : 91.1
## 3rd Qu.:1.0000 3rd Qu.: 76.5
## Max. :1.0000 Max. :1200.0
summary(wage1,digits=2)
## wage educ exper tenure nonwhite
## Min. : 0.53 Min. : 0 Min. : 1 Min. : 0.0 Min. :0.0
## 1st Qu.: 3.33 1st Qu.:12 1st Qu.: 5 1st Qu.: 0.0 1st Qu.:0.0
## Median : 4.65 Median :12 Median :14 Median : 2.0 Median :0.0
## Mean : 5.90 Mean :13 Mean :17 Mean : 5.1 Mean :0.1
## 3rd Qu.: 6.88 3rd Qu.:14 3rd Qu.:26 3rd Qu.: 7.0 3rd Qu.:0.0
## Max. :24.98 Max. :18 Max. :51 Max. :44.0 Max. :1.0
## female married numdep smsa northcen
## Min. :0.00 Min. :0.00 Min. :0 Min. :0.00 Min. :0.00
## 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0 1st Qu.:0.00 1st Qu.:0.00
## Median :0.00 Median :1.00 Median :1 Median :1.00 Median :0.00
## Mean :0.48 Mean :0.61 Mean :1 Mean :0.72 Mean :0.25
## 3rd Qu.:1.00 3rd Qu.:1.00 3rd Qu.:2 3rd Qu.:1.00 3rd Qu.:0.75
## Max. :1.00 Max. :1.00 Max. :6 Max. :1.00 Max. :1.00
## south west construc ndurman trcommpu
## Min. :0.00 Min. :0.00 Min. :0.000 Min. :0.00 Min. :0.000
## 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:0.000
## Median :0.00 Median :0.00 Median :0.000 Median :0.00 Median :0.000
## Mean :0.36 Mean :0.17 Mean :0.046 Mean :0.11 Mean :0.044
## 3rd Qu.:1.00 3rd Qu.:0.00 3rd Qu.:0.000 3rd Qu.:0.00 3rd Qu.:0.000
## Max. :1.00 Max. :1.00 Max. :1.000 Max. :1.00 Max. :1.000
## trade services profserv profocc clerocc
## Min. :0.00 Min. :0.0 Min. :0.00 Min. :0.00 Min. :0.00
## 1st Qu.:0.00 1st Qu.:0.0 1st Qu.:0.00 1st Qu.:0.00 1st Qu.:0.00
## Median :0.00 Median :0.0 Median :0.00 Median :0.00 Median :0.00
## Mean :0.29 Mean :0.1 Mean :0.26 Mean :0.37 Mean :0.17
## 3rd Qu.:1.00 3rd Qu.:0.0 3rd Qu.:1.00 3rd Qu.:1.00 3rd Qu.:0.00
## Max. :1.00 Max. :1.0 Max. :1.00 Max. :1.00 Max. :1.00
## servocc lwage expersq tenursq female_married
## Min. :0.00 Min. :-0.63 Min. : 1 Min. : 0 Min. :0.00
## 1st Qu.:0.00 1st Qu.: 1.20 1st Qu.: 25 1st Qu.: 0 1st Qu.:0.00
## Median :0.00 Median : 1.54 Median : 182 Median : 4 Median :0.00
## Mean :0.14 Mean : 1.62 Mean : 473 Mean : 78 Mean :0.25
## 3rd Qu.:0.00 3rd Qu.: 1.93 3rd Qu.: 676 3rd Qu.: 49 3rd Qu.:0.75
## Max. :1.00 Max. : 3.22 Max. :2601 Max. :1936 Max. :1.00
## exper_tenure
## Min. : 0
## 1st Qu.: 0
## Median : 27
## Mean : 136
## 3rd Qu.: 114
## Max. :2068
# library(pastecs)
stat.desc(wage1)
# Coefficient of variation
sd(wage1$wage)/mean(wage1$wage)
## [1] 0.6263605
# Mode
tab <- table(wage1$educ) # number of occurrences for each unique value
sort(tab, decreasing = TRUE) # sort highest to lowest
##
## 12 16 14 13 10 11 8 15 18 9 17 6 7 4 0 2 3 5
## 198 68 53 39 30 29 22 21 19 17 12 6 4 3 2 1 1 1
# or
sort(table(wage1$educ), decreasing = TRUE)
##
## 12 16 14 13 10 11 8 15 18 9 17 6 7 4 0 2 3 5
## 198 68 53 39 30 29 22 21 19 17 12 6 4 3 2 1 1 1
Start discussing the statistics and graphs.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 12.00 12.00 12.56 14.00 18.00
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
# vis_dat(wage1)
# vis_miss(nlswork) # ALTERNATIVE
# gg_miss_upset(wage1)
# Following the discussion here: http://www.sthda.com/english/wiki/descriptive-statistics-and-graphics
# Box plots
ggboxplot(wage1, y = "lwage", width = 0.5)
# Histogram
gghistogram(wage1, x = "lwage", bins = 9,
add = "mean")
## Warning: `geom_vline()`: Ignoring `mapping` because `xintercept` was provided.
## Warning: `geom_vline()`: Ignoring `data` because `xintercept` was provided.
ggplot(wage1) +
aes(x = educ) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Empirical cumulative distribution function (ECDF)
## ECDF is the fraction of data smaller than or equal to x
ggecdf(wage1, x = "lwage")
# Q-Q plots
## QQ plots is used to check whether the data is normally distributed
ggqqplot(wage1, x = "exper")
## Warning: The following aesthetics were dropped during statistical transformation: sample
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: The following aesthetics were dropped during statistical transformation: sample
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
# Using 'dplyr
group_by(wage1, smsa) %>%
summarise(
count = n(),
mean = mean(lwage, na.rm = TRUE),
sd = sd(lwage, na.rm = TRUE)
)
# # Box plot colored by groups: Species
ggboxplot(wage1, x = "smsa", y = "lwage",
color = "smsa",
palette = c("#00AFBB", "#E7B800", "#FC4E07"))
ggplot(wage1) +
aes(group = female, y = lwage) +
geom_boxplot()
# library(lattice)
dotplot(wage1$exper ~ wage1$tenure)
# Stripchart colored by groups: Species
ggstripchart(wage1, x = "female", y = "tenure",
color = "female",
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
add = "mean_sd")
# Scatterplot
plot(wage1$exper, wage1$lwage)
ggplot(wage1) +
aes(x=exper,y=lwage) +
geom_point()
# http://www.sthda.com/english/wiki/ggplot2-scatter-plots-quick-start-guide-r-software-and-data-visualization
ggplot(wage1) +
aes(x=exper,y=lwage,color=as.factor(female)) +
geom_point() +
scale_color_hue()
ggplot(wage1) +
aes(x=exper,y=lwage,color=as.factor(female),shape=as.factor(female)) +
geom_point() +
geom_smooth(method=lm)
## `geom_smooth()` using formula = 'y ~ x'
# Line plot
plot(wage,type = "l")
# Density
plot(density(wage1$lwage))
# or
ggplot(wage1) +
aes(x=lwage) +
geom_density()
# Correlation plot
## EXPLORE: https://statsandr.com/blog/correlogram-in-r-how-to-highlight-the-most-correlated-variables-in-a-dataset/
freq(wage1$female)
## Frequencies
## wage1$female
## Label: =1 if female
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 0 274 52.09 52.09 52.09 52.09
## 1 252 47.91 100.00 47.91 100.00
## <NA> 0 0.00 100.00
## Total 526 100.00 100.00 100.00 100.00
# ctable(x=wage1$female,y=wage1$married)
summarytools::descr(wage1)
## Descriptive Statistics
## wage1
## N: 526
##
## clerocc construc educ exper exper_tenure expersq female
## ----------------- --------- ---------- -------- -------- -------------- --------- --------
## Mean 0.17 0.05 12.56 17.02 135.73 473.44 0.48
## Std.Dev 0.37 0.21 2.77 13.57 263.57 616.04 0.50
## Min 0.00 0.00 0.00 1.00 0.00 1.00 0.00
## Q1 0.00 0.00 12.00 5.00 0.00 25.00 0.00
## Median 0.00 0.00 12.00 13.50 27.00 182.50 0.00
## Q3 0.00 0.00 14.00 26.00 114.00 676.00 1.00
## Max 1.00 1.00 18.00 51.00 2068.00 2601.00 1.00
## MAD 0.00 0.00 1.48 14.08 40.03 264.64 0.00
## IQR 0.00 0.00 2.00 21.00 114.00 651.00 1.00
## CV 2.23 4.58 0.22 0.80 1.94 1.30 1.04
## Skewness 1.78 4.34 -0.62 0.70 3.09 1.49 0.08
## SE.Skewness 0.11 0.11 0.11 0.11 0.11 0.11 0.11
## Kurtosis 1.16 16.89 1.87 -0.65 11.65 1.28 -2.00
## N.Valid 526.00 526.00 526.00 526.00 526.00 526.00 526.00
## Pct.Valid 100.00 100.00 100.00 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## female_married lwage married ndurman nonwhite northcen numdep
## ----------------- ---------------- -------- --------- --------- ---------- ---------- --------
## Mean 0.25 1.62 0.61 0.11 0.10 0.25 1.04
## Std.Dev 0.43 0.53 0.49 0.32 0.30 0.43 1.26
## Min 0.00 -0.63 0.00 0.00 0.00 0.00 0.00
## Q1 0.00 1.20 0.00 0.00 0.00 0.00 0.00
## Median 0.00 1.54 1.00 0.00 0.00 0.00 1.00
## Q3 1.00 1.93 1.00 0.00 0.00 1.00 2.00
## Max 1.00 3.22 1.00 1.00 1.00 1.00 6.00
## MAD 0.00 0.53 0.00 0.00 0.00 0.00 1.48
## IQR 0.75 0.73 1.00 0.00 0.00 0.75 2.00
## CV 1.73 0.33 0.80 2.79 2.96 1.73 1.21
## Skewness 1.15 0.39 -0.44 2.42 2.61 1.15 1.16
## SE.Skewness 0.11 0.11 0.11 0.11 0.11 0.11 0.11
## Kurtosis -0.69 0.37 -1.81 3.87 4.83 -0.69 0.89
## N.Valid 526.00 526.00 526.00 526.00 526.00 526.00 526.00
## Pct.Valid 100.00 100.00 100.00 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## profocc profserv services servocc smsa south tenure tenursq
## ----------------- --------- ---------- ---------- --------- -------- -------- -------- ---------
## Mean 0.37 0.26 0.10 0.14 0.72 0.36 5.10 78.15
## Std.Dev 0.48 0.44 0.30 0.35 0.45 0.48 7.22 199.43
## Min 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Q1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## Median 0.00 0.00 0.00 0.00 1.00 0.00 2.00 4.00
## Q3 1.00 1.00 0.00 0.00 1.00 1.00 7.00 49.00
## Max 1.00 1.00 1.00 1.00 1.00 1.00 44.00 1936.00
## MAD 0.00 0.00 0.00 0.00 0.00 0.00 2.97 5.93
## IQR 1.00 1.00 0.00 0.00 1.00 1.00 7.00 49.00
## CV 1.31 1.70 2.99 2.47 0.62 1.35 1.42 2.55
## Skewness 0.55 1.10 2.65 2.06 -0.99 0.60 2.10 4.36
## SE.Skewness 0.11 0.11 0.11 0.11 0.11 0.11 0.11 0.11
## Kurtosis -1.70 -0.79 5.01 2.25 -1.02 -1.64 4.63 24.80
## N.Valid 526.00 526.00 526.00 526.00 526.00 526.00 526.00 526.00
## Pct.Valid 100.00 100.00 100.00 100.00 100.00 100.00 100.00 100.00
##
## Table: Table continues below
##
##
##
## trade trcommpu wage west
## ----------------- -------- ---------- -------- --------
## Mean 0.29 0.04 5.90 0.17
## Std.Dev 0.45 0.20 3.69 0.38
## Min 0.00 0.00 0.53 0.00
## Q1 0.00 0.00 3.33 0.00
## Median 0.00 0.00 4.65 0.00
## Q3 1.00 0.00 6.88 0.00
## Max 1.00 1.00 24.98 1.00
## MAD 0.00 0.00 2.37 0.00
## IQR 1.00 0.00 3.55 0.00
## CV 1.58 4.68 0.63 2.22
## Skewness 0.94 4.45 2.00 1.76
## SE.Skewness 0.11 0.11 0.11 0.11
## Kurtosis -1.12 17.84 4.94 1.10
## N.Valid 526.00 526.00 526.00 526.00
## Pct.Valid 100.00 100.00 100.00 100.00
dfSummary(wage1)
And now we report some frequency tables:
table(wage1$female)
tbl1 <- as.data.frame(table(wage1$nonwhite))
tbl1
# Visualize using bar plot
ggbarplot(tbl1, x = "Var1", y = "Freq")
barplot(table(wage1$educ))
barplot(prop.table(table(wage1$educ)))
# Two-way contingency table: Two categorical variables
tbl2 <- table(wage1$female, wage1$married)
tbl2
xtabs(~female + married,data=wage1)
tbl3 <- as.data.frame(tbl2)
colnames(tbl3) <- c('female','married','Freq')
ggbarplot(tbl3, x = "female", y = "Freq",
color = "married",
palette = c("brown", "blue", "gold", "green"))
# position dodge
ggbarplot(tbl3, x = "female", y = "Freq",
color = "married", position = position_dodge(),
palette = c("brown", "blue", "gold", "green"))
# library(ggplot2)
ggplot(wage1) +
aes(x = educ) +
geom_bar()
# Multiway tables: More than two categorical variables
xtabs(~female + married + nonwhite, data = wage1)
# Compute table margins and relative frequency
margin.table(wage1$female, margin = NULL)
# Margin of rows
margin.table(tbl2, 1)
# Margin of columns
margin.table(tbl2, 2)
# Frequencies relative to row total
prop.table(tbl2, 1)
# Table of percentages
round(prop.table(tbl2, 1), 2)*100
# To express the frequencies relative to the grand total, use this:
tbl2/sum(tbl2)
# Cross-tabulations with ctable()
summarytools::ctable( x = wage1$female, y = wage1$married)
# https://statsandr.com/blog/descriptive-statistics-in-r/
# Contingency table
xtabs(~ wage1$female + wage1$married)
## wage1$married
## wage1$female 0 1
## 0 86 188
## 1 120 132
# percentages by row:
round(prop.table(table(wage1$female, wage1$married), 1), 2) # round to 2 digits
##
## 0 1
## 0 0.31 0.69
## 1 0.48 0.52
# percentages by column:
round(prop.table(table(wage1$female, wage1$married), 2), 2) # round to 2 digits with round()
##
## 0 1
## 0 0.42 0.59
## 1 0.58 0.41
# Mosaic plot
# A mosaic plot allows to visualize a contingency table of two qualitative variables
mosaicplot(table(wage1$female, wage1$married),
color = TRUE,
xlab = "Female", # label for x-axis
ylab = "Married" # label for y-axis
)
w1 <- data.frame(wage1)
summary(w1)
## wage educ exper tenure
## Min. : 0.530 Min. : 0.00 Min. : 1.00 Min. : 0.000
## 1st Qu.: 3.330 1st Qu.:12.00 1st Qu.: 5.00 1st Qu.: 0.000
## Median : 4.650 Median :12.00 Median :13.50 Median : 2.000
## Mean : 5.896 Mean :12.56 Mean :17.02 Mean : 5.105
## 3rd Qu.: 6.880 3rd Qu.:14.00 3rd Qu.:26.00 3rd Qu.: 7.000
## Max. :24.980 Max. :18.00 Max. :51.00 Max. :44.000
## nonwhite female married numdep
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :1.000
## Mean :0.1027 Mean :0.4791 Mean :0.6084 Mean :1.044
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :6.000
## smsa northcen south west
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.7224 Mean :0.251 Mean :0.3555 Mean :0.1692
## 3rd Qu.:1.0000 3rd Qu.:0.750 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## construc ndurman trcommpu trade
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.04563 Mean :0.1141 Mean :0.04373 Mean :0.2871
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## services profserv profocc clerocc
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1008 Mean :0.2586 Mean :0.3669 Mean :0.1673
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## servocc lwage expersq tenursq
## Min. :0.0000 Min. :-0.6349 Min. : 1.0 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.: 1.2030 1st Qu.: 25.0 1st Qu.: 0.00
## Median :0.0000 Median : 1.5369 Median : 182.5 Median : 4.00
## Mean :0.1407 Mean : 1.6233 Mean : 473.4 Mean : 78.15
## 3rd Qu.:0.0000 3rd Qu.: 1.9286 3rd Qu.: 676.0 3rd Qu.: 49.00
## Max. :1.0000 Max. : 3.2181 Max. :2601.0 Max. :1936.00
## female_married exper_tenure
## Min. :0.000 Min. : 0.0
## 1st Qu.:0.000 1st Qu.: 0.0
## Median :0.000 Median : 27.0
## Mean :0.251 Mean : 135.7
## 3rd Qu.:0.750 3rd Qu.: 114.0
## Max. :1.000 Max. :2068.0
stargazer(w1,
title = "Summary statistics",
label = "tb:statistcis",
table.placement = "ht",
header=FALSE,type="text")
##
## Summary statistics
## =================================================
## Statistic N Mean St. Dev. Min Max
## -------------------------------------------------
## wage 526 5.896 3.693 0.530 24.980
## educ 526 12.563 2.769 0 18
## exper 526 17.017 13.572 1 51
## tenure 526 5.105 7.224 0 44
## nonwhite 526 0.103 0.304 0 1
## female 526 0.479 0.500 0 1
## married 526 0.608 0.489 0 1
## numdep 526 1.044 1.262 0 6
## smsa 526 0.722 0.448 0 1
## northcen 526 0.251 0.434 0 1
## south 526 0.356 0.479 0 1
## west 526 0.169 0.375 0 1
## construc 526 0.046 0.209 0 1
## ndurman 526 0.114 0.318 0 1
## trcommpu 526 0.044 0.205 0 1
## trade 526 0.287 0.453 0 1
## services 526 0.101 0.301 0 1
## profserv 526 0.259 0.438 0 1
## profocc 526 0.367 0.482 0 1
## clerocc 526 0.167 0.374 0 1
## servocc 526 0.141 0.348 0 1
## lwage 526 1.623 0.532 -0.635 3.218
## expersq 526 473.435 616.045 1 2,601
## tenursq 526 78.150 199.435 0 1,936
## female_married 526 0.251 0.434 0 1
## exper_tenure 526 135.728 263.573 0 2,068
## -------------------------------------------------
reg1 <- lm(data = wage1,lwage ~ educ + female)
summary(reg1)
##
## Call:
## lm(formula = lwage ~ educ + female, data = wage1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02672 -0.27470 -0.03731 0.26219 1.34738
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.826269 0.094054 8.785 <2e-16 ***
## educ 0.077203 0.007047 10.955 <2e-16 ***
## female -0.360865 0.039024 -9.247 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4455 on 523 degrees of freedom
## Multiple R-squared: 0.3002, Adjusted R-squared: 0.2975
## F-statistic: 112.2 on 2 and 523 DF, p-value: < 2.2e-16
reg2 <- lm(data = wage1,lwage ~ educ + female + married + nonwhite + exper + expersq)
summary(reg2)
##
## Call:
## lm(formula = lwage ~ educ + female + married + nonwhite + exper +
## expersq, data = wage1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.79493 -0.27927 -0.01887 0.26057 1.27372
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3954849 0.1030918 3.836 0.00014 ***
## educ 0.0826774 0.0070297 11.761 < 2e-16 ***
## female -0.3293054 0.0367083 -8.971 < 2e-16 ***
## married 0.0638034 0.0422190 1.511 0.13133
## nonwhite -0.0147385 0.0597847 -0.247 0.80537
## exper 0.0358161 0.0052508 6.821 2.52e-11 ***
## expersq -0.0006334 0.0001131 -5.600 3.48e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4133 on 519 degrees of freedom
## Multiple R-squared: 0.4024, Adjusted R-squared: 0.3955
## F-statistic: 58.24 on 6 and 519 DF, p-value: < 2.2e-16
stargazer(reg1,reg2,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("OLS","Pooled"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Education (years)","Female","Married","Experience (years)",
"Experience sqrd."),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 3,
digits.extra = 5,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ===================================================
## OLS Pooled
## Education (years) 0.077*** 0.083***
## (0.007) (0.007)
## Female -0.361*** -0.329***
## (0.039) (0.037)
## Married 0.064
## (0.042)
## Experience (years) -0.015
## (0.060)
## Experience sqrd. 0.036***
## (0.005)
## expersq -0.001***
## (0.0001)
## N 526 526
## R2 0.300 0.402
## ===================================================
## Notes: Standard errors in parentheses.
# Heteroskedasticity
bptest(data = wage1, lwage ~ educ, studentize=F)
##
## Breusch-Pagan test
##
## data: lwage ~ educ
## BP = 0.95939, df = 1, p-value = 0.3273
# Multicoliniarity
car::vif(reg2)
## educ female married nonwhite exper expersq
## 1.164649 1.035618 1.307830 1.013994 15.610357 14.925842
Here I am reading a Stata data file (code omitted).
names(nlswork)
## [1] "idcode" "year" "birth_yr" "age" "race" "msp"
## [7] "nev_mar" "grade" "collgrad" "not_smsa" "c_city" "south"
## [13] "ind_code" "occ_code" "union" "wks_ue" "ttl_exp" "tenure"
## [19] "hours" "wks_work" "ln_wage"
head(nlswork)
# str(nlswork)
# dplyr::glimpse(nlswork)
dplyr::glimpse(nlswork$ln_wage)
## num [1:28534] 1.45 1.03 1.59 1.78 1.78 ...
## - attr(*, "label")= chr "ln(wage/GNP deflator)"
## - attr(*, "format.stata")= chr "%9.0g"
ExpData(nlswork,type=1)
ExpData(nlswork,type=2)
Start discussing the statistics and graphs.
## grade
## Min. : 0.00
## 1st Qu.:12.00
## Median :12.00
## Mean :12.53
## 3rd Qu.:14.00
## Max. :18.00
## NA's :2
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
## $`0`
vis_dat(nlswork)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the visdat package.
## Please report the issue at <]8;;https://github.com/ropensci/visdat/issueshttps://github.com/ropensci/visdat/issues]8;;>.
# vis_miss(nlswork) # ALTERNATIVE
gg_miss_upset(nlswork)
stargazer(nls,
title = "Summary statistics",
label = "tb:statistcis",
table.placement = "ht",
header=FALSE,type="text")
##
## Summary statistics
## =================================================
## Statistic N Mean St. Dev. Min Max
## -------------------------------------------------
## idcode 28,534 2,601.284 1,487.359 1 5,159
## year 28,534 77.959 6.384 68 88
## birth_yr 28,534 48.085 3.013 41 54
## age 28,510 29.045 6.701 14 46
## race 28,534 1.303 0.482 1 3
## msp 28,518 0.603 0.489 0 1
## nev_mar 28,518 0.230 0.421 0 1
## grade 28,532 12.533 2.324 0 18
## collgrad 28,534 0.168 0.374 0 1
## not_smsa 28,526 0.282 0.450 0 1
## c_city 28,526 0.357 0.479 0 1
## south 28,526 0.410 0.492 0 1
## ind_code 28,193 7.693 2.994 1 12
## occ_code 28,413 4.778 3.065 1 13
## union 19,238 0.234 0.424 0 1
## wks_ue 22,830 2.548 7.294 0 76
## ttl_exp 28,534 6.215 4.652 0.000 28.885
## tenure 28,101 3.124 3.751 0.000 25.917
## hours 28,467 36.560 9.870 1 168
## wks_work 27,831 53.989 29.032 0 104
## ln_wage 28,534 1.675 0.478 0.000 5.264
## -------------------------------------------------
plot(nlswork$grade,nlswork$ln_wage)
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Frequencies
## nlswork$year
## Label: interview year
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 88 2272 7.96 7.96 7.96 7.96
## 77 2171 7.61 15.57 7.61 15.57
## 87 2164 7.58 23.15 7.58 23.15
## 75 2141 7.50 30.66 7.50 30.66
## 82 2085 7.31 37.97 7.31 37.97
## 85 2085 7.31 45.27 7.31 45.27
## 83 1987 6.96 52.24 6.96 52.24
## 73 1981 6.94 59.18 6.94 59.18
## 78 1964 6.88 66.06 6.88 66.06
## 71 1851 6.49 72.55 6.49 72.55
## 80 1847 6.47 79.02 6.47 79.02
## 72 1693 5.93 84.95 5.93 84.95
## 70 1686 5.91 90.86 5.91 90.86
## 68 1375 4.82 95.68 4.82 95.68
## 69 1232 4.32 100.00 4.32 100.00
## <NA> 0 0.00 100.00
## Total 28534 100.00 100.00 100.00 100.00
## Frequencies
## nlswork$race
## Label: 1=white, 2=black, 3=other
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 1 20180 70.72 70.72 70.72 70.72
## 2 8051 28.22 98.94 28.22 98.94
## 3 303 1.06 100.00 1.06 100.00
## <NA> 0 0.00 100.00
## Total 28534 100.00 100.00 100.00 100.00
Estimating a Mincerian Wage Equation
POLS estimator with cluster-robust standard errors
##
## Regression analysis
## =================================================
## OLS panel
## linear
## OLS Pooled
## -------------------------------------------------
## Union 0.113*** 0.113***
## (0.007) (0.007)
## Collage Graduate 0.351*** 0.351***
## (0.007) (0.007)
## Age 0.022*** 0.022***
## (0.004) (0.004)
## Age sqrd. -0.0003*** -0.0003***
## (0.0001) (0.0001)
## Tenure 0.055*** 0.055***
## (0.002) (0.002)
## Tenure sqrd. -0.002*** -0.002***
## (0.0001) (0.0001)
## Not SMSA -0.205*** -0.205***
## (0.007) (0.007)
## South -0.141*** -0.141***
## (0.006) (0.006)
## City -0.032*** -0.032***
## (0.007) (0.007)
## N 19,007 19,007
## R2 0.319 0.319
## =================================================
## Notes: Standard errors in parentheses.
# ftable(c_city) # 1 if central city
pols_robust <- coeftest(pols, function(x) vcovHC(x, type = 'sss'))
stargazer(pols,pols_robust,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("Pooled","Pooled (cluster)"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","Collage graduate","Age","Age sqrd.",
"Tenure","Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 6,
digits.extra = 7,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## =================================================
## panel coefficient
## linear test
## Pooled Pooled (cluster)
## -------------------------------------------------
## Union 0.112774*** 0.112774***
## (0.006774) (0.011731)
## Collage graduate 0.350946*** 0.350946***
## (0.007149) (0.014112)
## Age 0.022481*** 0.022481***
## (0.004249) (0.005373)
## Age sqrd. -0.000306*** -0.000306***
## (0.000068) (0.000088)
## Tenure 0.054787*** 0.054787***
## (0.001944) (0.002743)
## Tenure sqrd. -0.001540*** -0.001540***
## (0.000125) (0.000180)
## Not SMSA -0.205457*** -0.205457***
## (0.007120) (0.013137)
## South -0.140589*** -0.140589***
## (0.005850) (0.010964)
## City -0.031543*** -0.031543***
## (0.006683) (0.011691)
## N 19,007
## R2 0.319459
## =================================================
## Notes: Standard errors in parentheses.
for a balanced panel we have
nlswork_balanced <- read_dta("data/nlswork_balanced.dta")
re_balanced <- plm(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, model="random",
index=c("idcode", "year"))
summary(re_balanced)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = ln_wage ~ union + collgrad + age + agesq + tenure +
## tensq + not_smsa + south + c_city, data = nlswork_balanced,
## model = "random", index = c("idcode", "year"))
##
## Balanced Panel: n = 53, T = 12, N = 636
##
## Effects:
## var std.dev share
## idiosyncratic 0.03698 0.19230 0.319
## individual 0.07902 0.28110 0.681
## theta: 0.8063
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.904519 -0.104813 0.015673 0.114854 0.658580
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.05650211 0.20514289 5.1501 2.604e-07 ***
## union 0.06087668 0.02949229 2.0642 0.0390030 *
## collgrad 0.20128253 0.17651938 1.1403 0.2541673
## age 0.04592525 0.01334703 3.4409 0.0005799 ***
## agesq -0.00051693 0.00020986 -2.4632 0.0137701 *
## tenure 0.00332254 0.00616575 0.5389 0.5899766
## tensq 0.00011290 0.00028483 0.3964 0.6918193
## not_smsa -0.29590935 0.07567198 -3.9104 9.214e-05 ***
## south -0.06346941 0.06258349 -1.0142 0.3105084
## c_city 0.00068168 0.03609511 0.0189 0.9849322
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 32.65
## Residual Sum of Squares: 23.689
## R-Squared: 0.27447
## Adj. R-Squared: 0.26404
## Chisq: 236.819 on 9 DF, p-value: < 2.22e-16
re <- plm(data = nlswork_clean, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, model="random",
index=c("idcode", "year"))
summary(re)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = ln_wage ~ union + collgrad + age + agesq + tenure +
## tensq + not_smsa + south + c_city, data = nlswork_clean,
## model = "random", index = c("idcode", "year"))
##
## Unbalanced Panel: n = 4134, T = 1-12, N = 19007
##
## Effects:
## var std.dev share
## idiosyncratic 0.06476 0.25448 0.444
## individual 0.08108 0.28475 0.556
## theta:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3336 0.5920 0.6572 0.6406 0.6987 0.7502
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.77886 -0.13081 0.00854 0.00428 0.14314 3.03289
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.1369e+00 5.0729e-02 22.4103 < 2.2e-16 ***
## union 1.0368e-01 6.4468e-03 16.0819 < 2.2e-16 ***
## collgrad 3.6931e-01 1.2344e-02 29.9171 < 2.2e-16 ***
## age 2.3031e-02 3.3174e-03 6.9424 3.856e-12 ***
## agesq -2.4910e-04 5.3181e-05 -4.6839 2.814e-06 ***
## tenure 4.0771e-02 1.5826e-03 25.7618 < 2.2e-16 ***
## tensq -1.2466e-03 1.0022e-04 -12.4393 < 2.2e-16 ***
## not_smsa -1.5114e-01 9.3804e-03 -16.1119 < 2.2e-16 ***
## south -1.1157e-01 8.4671e-03 -13.1766 < 2.2e-16 ***
## c_city 3.9713e-04 7.4923e-03 0.0530 0.9577
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1942.3
## Residual Sum of Squares: 1268
## R-Squared: 0.34903
## Adj. R-Squared: 0.34872
## Chisq: 4820.91 on 9 DF, p-value: < 2.22e-16
re_robust <- coeftest(re, function(x) vcovHC(x, type = 'sss'))
stargazer(pols,pols_robust,re,re_robust,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("Pooled","Pooled (cluster)","RE","RE (cluster"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","College Graduate","Age","Age sqrd.",
"Tenure","Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 3,
digits.extra = 5,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ===================================================================
## panel coefficient panel coefficient
## linear test linear test
## Pooled Pooled (cluster) RE RE (cluster
## -------------------------------------------------------------------
## Union 0.113*** 0.113*** 0.104*** 0.104***
## (0.007) (0.012) (0.006) (0.009)
## College Graduate 0.351*** 0.351*** 0.369*** 0.369***
## (0.007) (0.014) (0.012) (0.013)
## Age 0.022*** 0.022*** 0.023*** 0.023***
## (0.004) (0.005) (0.003) (0.005)
## Age sqrd. -0.0003*** -0.0003*** -0.0002*** -0.0002***
## (0.0001) (0.0001) (0.0001) (0.0001)
## Tenure 0.055*** 0.055*** 0.041*** 0.041***
## (0.002) (0.003) (0.002) (0.002)
## Tenure sqrd. -0.002*** -0.002*** -0.001*** -0.001***
## (0.0001) (0.0002) (0.0001) (0.0001)
## Not SMSA -0.205*** -0.205*** -0.151*** -0.151***
## (0.007) (0.013) (0.009) (0.012)
## South -0.141*** -0.141*** -0.112*** -0.112***
## (0.006) (0.011) (0.008) (0.011)
## City -0.032*** -0.032*** 0.0004 0.0004
## (0.007) (0.012) (0.007) (0.010)
## N 19,007 19,007
## R2 0.319 0.349
## ===================================================================
## Notes: Standard errors in parentheses.
stargazer(pols,pols_robust,re,re_robust,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("Pooled","Pooled (cluster)","RE","RE (cluster"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","College Graduate","Age","Age sqrd.",
"Tenure","Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 3,
digits.extra = 5,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ===================================================================
## panel coefficient panel coefficient
## linear test linear test
## Pooled Pooled (cluster) RE RE (cluster
## -------------------------------------------------------------------
## Union 0.113*** 0.113*** 0.104*** 0.104***
## (0.007) (0.012) (0.006) (0.009)
## College Graduate 0.351*** 0.351*** 0.369*** 0.369***
## (0.007) (0.014) (0.012) (0.013)
## Age 0.022*** 0.022*** 0.023*** 0.023***
## (0.004) (0.005) (0.003) (0.005)
## Age sqrd. -0.0003*** -0.0003*** -0.0002*** -0.0002***
## (0.0001) (0.0001) (0.0001) (0.0001)
## Tenure 0.055*** 0.055*** 0.041*** 0.041***
## (0.002) (0.003) (0.002) (0.002)
## Tenure sqrd. -0.002*** -0.002*** -0.001*** -0.001***
## (0.0001) (0.0002) (0.0001) (0.0001)
## Not SMSA -0.205*** -0.205*** -0.151*** -0.151***
## (0.007) (0.013) (0.009) (0.012)
## South -0.141*** -0.141*** -0.112*** -0.112***
## (0.006) (0.011) (0.008) (0.011)
## City -0.032*** -0.032*** 0.0004 0.0004
## (0.007) (0.012) (0.007) (0.010)
## N 19,007 19,007
## R2 0.319 0.349
## ===================================================================
## Notes: Standard errors in parentheses.
plmtest(pols, type=c("bp"))
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: ln_wage ~ union + collgrad + age + agesq + tenure + tensq + not_smsa + ...
## chisq = 14041, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
kable(tidy(plmtest(pols, type=c("bp"))), format = "simple",caption=
"LM test for the presence of unobserved effects")
| statistic | p.value | parameter | method | alternative |
|---|---|---|---|---|
| 14041.19 | 0 | 1 | Lagrange Multiplier Test - (Breusch-Pagan) | significant effects |
# //Final slide 32
#
# *Q3*
#
fe <- plm(data = nlswork_clean, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, model="within", index=c("idcode", "year"))
summary(fe)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = ln_wage ~ union + collgrad + age + agesq + tenure +
## tensq + not_smsa + south + c_city, data = nlswork_clean,
## model = "within", index = c("idcode", "year"))
##
## Unbalanced Panel: n = 4134, T = 1-12, N = 19007
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.88027 -0.10216 0.00000 0.10774 2.80710
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## union 9.3877e-02 6.9662e-03 13.4761 < 2.2e-16 ***
## age 2.4259e-02 3.4467e-03 7.0383 2.031e-12 ***
## agesq -2.2618e-04 5.5316e-05 -4.0890 4.356e-05 ***
## tenure 3.2966e-02 1.6465e-03 20.0218 < 2.2e-16 ***
## tensq -1.1002e-03 1.0291e-04 -10.6916 < 2.2e-16 ***
## not_smsa -9.3105e-02 1.2970e-02 -7.1787 7.372e-13 ***
## south -6.3222e-02 1.3279e-02 -4.7611 1.944e-06 ***
## c_city 1.1409e-02 8.8964e-03 1.2824 0.1997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1119.5
## Residual Sum of Squares: 962.69
## R-Squared: 0.14005
## Adj. R-Squared: -0.099505
## F-statistic: 302.62 on 8 and 14865 DF, p-value: < 2.22e-16
stargazer(pols,re,fe,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("Pooled","RE","FE"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","College","Age","Age sqrd.","Tenure",
"Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 6,
digits.extra = 7,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ===================================================
## Pooled RE FE
## Union 0.112774*** 0.103677*** 0.093877***
## (0.006774) (0.006447) (0.006966)
## College 0.350946*** 0.369309***
## (0.007149) (0.012344)
## Age 0.022481*** 0.023031*** 0.024259***
## (0.004249) (0.003317) (0.003447)
## Age sqrd. -0.000306*** -0.000249*** -0.000226***
## (0.000068) (0.000053) (0.000055)
## Tenure 0.054787*** 0.040771*** 0.032966***
## (0.001944) (0.001583) (0.001646)
## Tenure sqrd. -0.001540*** -0.001247*** -0.001100***
## (0.000125) (0.000100) (0.000103)
## Not SMSA -0.205457*** -0.151136*** -0.093105***
## (0.007120) (0.009380) (0.012970)
## South -0.140589*** -0.111567*** -0.063222***
## (0.005850) (0.008467) (0.013279)
## City -0.031543*** 0.000397 0.011409
## (0.006683) (0.007492) (0.008896)
## N 19,007 19,007 19,007
## R2 0.319459 0.349029 0.140054
## ===================================================
## Notes: Standard errors in parentheses.
# Testing for fixed effects, null: OLS better than fixed
# 'F test for individual effects' <<==>> 'F test that all u_i=0'
ols_0 <- lm(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city)
summary(ols_0)
##
## Call:
## lm(formula = ln_wage ~ union + age + agesq + tenure + tensq +
## not_smsa + south + c_city, data = nlswork_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7497 -0.2508 -0.0182 0.2379 3.4100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.008e+00 6.821e-02 14.776 < 2e-16 ***
## union 1.294e-01 7.181e-03 18.026 < 2e-16 ***
## age 3.929e-02 4.495e-03 8.740 < 2e-16 ***
## agesq -5.387e-04 7.175e-05 -7.509 6.24e-14 ***
## tenure 5.806e-02 2.062e-03 28.158 < 2e-16 ***
## tensq -1.699e-03 1.331e-04 -12.765 < 2e-16 ***
## not_smsa -2.311e-01 7.538e-03 -30.657 < 2e-16 ***
## south -1.475e-01 6.208e-03 -23.762 < 2e-16 ***
## c_city -3.451e-02 7.094e-03 -4.864 1.16e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4091 on 18998 degrees of freedom
## Multiple R-squared: 0.2331, Adjusted R-squared: 0.2328
## F-statistic: 721.9 on 8 and 18998 DF, p-value: < 2.2e-16
pFtest(fe, ols_0)
##
## F test for individual effects
##
## data: ln_wage ~ union + collgrad + age + agesq + tenure + tensq + not_smsa + ...
## F = 8.2833, df1 = 4133, df2 = 14865, p-value < 2.2e-16
## alternative hypothesis: significant effects
# generate fixed-effects
# nlswork_clean$specific_effects <- fixef(fe)
# *Q3.1*
fe_robust <- coeftest(fe, function(x) vcovHC(x, type = 'sss'))
stargazer(ols_0,fe,fe_robust,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("OLS","FE","FE (cluster)"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","Age","Age sqrd.","Tenure",
"Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 6,
digits.extra = 7,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ===================================================
## OLS panel coefficient
## linear test
## OLS FE FE (cluster)
## ---------------------------------------------------
## Union 0.129446*** 0.093877*** 0.093877***
## (0.007181) (0.006966) (0.009565)
## Age 0.039289*** 0.024259*** 0.024259***
## (0.004495) (0.003447) (0.005008)
## Age sqrd. -0.000539*** -0.000226*** -0.000226***
## (0.000072) (0.000055) (0.000081)
## Tenure 0.058063*** 0.032966*** 0.032966***
## (0.002062) (0.001646) (0.002085)
## Tenure sqrd. -0.001699*** -0.001100*** -0.001100***
## (0.000133) (0.000103) (0.000126)
## Not SMSA -0.231095*** -0.093105*** -0.093105***
## (0.007538) (0.012970) (0.019790)
## South -0.147518*** -0.063222*** -0.063222***
## (0.006208) (0.013279) (0.021653)
## City -0.034506*** 0.011409 0.011409
## (0.007094) (0.008896) (0.012605)
## N 19,007 19,007
## R2 0.233124 0.140054
## ===================================================
## Notes: Standard errors in parentheses.
# *Q3.2*
linearHypothesis(ols,c("age=0","agesq=0"))
linearHypothesis(ols,c("age=0","agesq=0"), white.adjust = "hc1")
Wald_test(fe, vcov = "CR1", cluster = nlswork_clean$idcode,
constraints = constrain_zero(c("age","agesq")), test = "Naive-F")
# *LSDV Estimator=FE estimator* <<==>> takes too long
# *using a smaller sample*
nlswork_balanced <- read_dta("data/nlswork_balanced_small.dta")
LSDV <- lm(data = nlswork_balanced, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city + factor(idcode))
summary(LSDV)
##
## Call:
## lm(formula = ln_wage ~ union + age + agesq + tenure + tensq +
## not_smsa + south + c_city + factor(idcode), data = nlswork_balanced)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.254362 -0.069457 0.004535 0.078344 0.239917
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5332323 0.4915522 1.085 0.2833
## union 0.0895838 0.0548743 1.633 0.1090
## age 0.0758967 0.0334355 2.270 0.0276 *
## agesq -0.0011959 0.0005405 -2.212 0.0316 *
## tenure 0.0125105 0.0162130 0.772 0.4440
## tensq 0.0006329 0.0007176 0.882 0.3821
## not_smsa NA NA NA NA
## south NA NA NA NA
## c_city 0.1164782 0.0875330 1.331 0.1895
## factor(idcode)20 0.3287869 0.1289784 2.549 0.0140 *
## factor(idcode)126 -0.0167263 0.0546551 -0.306 0.7609
## factor(idcode)128 0.0375653 0.0882907 0.425 0.6724
## factor(idcode)379 -0.1504303 0.1188913 -1.265 0.2118
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1149 on 49 degrees of freedom
## Multiple R-squared: 0.8021, Adjusted R-squared: 0.7617
## F-statistic: 19.86 on 10 and 49 DF, p-value: 5.49e-14
fe_LSDV <- plm(data = nlswork_balanced, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city, model="within", index=c("idcode", "year"))
summary(fe_LSDV)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = ln_wage ~ union + age + agesq + tenure + tensq +
## not_smsa + south + c_city, data = nlswork_balanced, model = "within",
## index = c("idcode", "year"))
##
## Balanced Panel: n = 5, T = 12, N = 60
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.2543619 -0.0694565 0.0045351 0.0783435 0.2399171
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## union 0.08958377 0.05487432 1.6325 0.10898
## age 0.07589667 0.03343547 2.2699 0.02765 *
## agesq -0.00119586 0.00054052 -2.2124 0.03163 *
## tenure 0.01251051 0.01621304 0.7716 0.44404
## tensq 0.00063295 0.00071759 0.8821 0.38206
## c_city 0.11647818 0.08753305 1.3307 0.18945
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 2.3839
## Residual Sum of Squares: 0.64685
## R-Squared: 0.72866
## Adj. R-Squared: 0.67329
## F-statistic: 21.9312 on 6 and 49 DF, p-value: 2.4403e-12
stargazer(LSDV,fe_LSDV,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("LSDV","FE"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","Age","Age sqrd.","Tenure",
"Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 6,
digits.extra = 7,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ==================================================
## OLS panel
## linear
## LSDV FE
## --------------------------------------------------
## Union 0.089584 0.089584
## (0.054874) (0.054874)
## Age 0.075897** 0.075897**
## (0.033435) (0.033435)
## Age sqrd. -0.001196** -0.001196**
## (0.000541) (0.000541)
## Tenure 0.012511 0.012511
## (0.016213) (0.016213)
## Tenure sqrd. 0.000633 0.000633
## (0.000718) (0.000718)
## Not SMSA
##
## South
##
## City 0.116478 0.116478
## (0.087533) (0.087533)
## factor(idcode)20 0.328787**
## (0.128978)
## factor(idcode)126 -0.016726
## (0.054655)
## factor(idcode)128 0.037565
## (0.088291)
## factor(idcode)379 -0.150430
## (0.118891)
## N 60 60
## R2 0.802119 0.728663
## ==================================================
## Notes: Standard errors in parentheses.
# //Final slide 35
#
# *Q4*
fe_0 <- plm(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city, model="within", index=c("idcode", "year"))
re_0 <- plm(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city, model="random", index=c("idcode", "year"))
phtest(fe_0, re_0)
##
## Hausman Test
##
## data: ln_wage ~ union + age + agesq + tenure + tensq + not_smsa + south + ...
## chisq = 607.1, df = 8, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# //Final slide 46
#
# *Q5*
be <- plm(data = nlswork_clean, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, model="between",
index=c("idcode", "year"))
summary(be)
## Oneway (individual) effect Between Model
##
## Call:
## plm(formula = ln_wage ~ union + collgrad + age + agesq + tenure +
## tensq + not_smsa + south + c_city, data = nlswork_clean,
## model = "between", index = c("idcode", "year"))
##
## Unbalanced Panel: n = 4134, T = 1-12, N = 19007
## Observations used in estimation: 4134
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.5946586 -0.2004452 -0.0067495 0.1909616 1.8487578
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.19446539 0.15356828 7.7781 9.240e-15 ***
## union 0.11135322 0.01636297 6.8052 1.154e-11 ***
## collgrad 0.34850931 0.01308193 26.6405 < 2.2e-16 ***
## age 0.02113165 0.01022918 2.0658 0.03891 *
## agesq -0.00034316 0.00016368 -2.0965 0.03610 *
## tenure 0.09570307 0.00539210 17.7488 < 2.2e-16 ***
## tensq -0.00343998 0.00036430 -9.4426 < 2.2e-16 ***
## not_smsa -0.20536231 0.01440760 -14.2537 < 2.2e-16 ***
## south -0.13058886 0.01145804 -11.3971 < 2.2e-16 ***
## c_city -0.02215011 0.01385878 -1.5983 0.11006
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 773.5
## Residual Sum of Squares: 464.95
## R-Squared: 0.3989
## Adj. R-Squared: 0.39759
## F-statistic: 304.083 on 9 and 4124 DF, p-value: < 2.22e-16
# //Final slide 53
#
# *Q6*
fd <- plm(data = nlswork_clean, ln_wage ~ 0 + union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, model="fd",
index=c("idcode", "year"))
summary(fd)
## Oneway (individual) effect First-Difference Model
##
## Call:
## plm(formula = ln_wage ~ 0 + union + collgrad + age + agesq +
## tenure + tensq + not_smsa + south + c_city, data = nlswork_clean,
## model = "fd", index = c("idcode", "year"))
##
## Unbalanced Panel: n = 4134, T = 1-12, N = 19007
## Observations used in estimation: 14873
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.9266 -0.0925 0.0073 0.0156 0.1279 3.3217
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## union 6.9160e-02 6.6336e-03 10.4257 < 2.2e-16 ***
## age 2.2744e-02 5.5436e-03 4.1027 4.104e-05 ***
## agesq -2.5853e-04 9.0084e-05 -2.8698 0.004113 **
## tenure 3.2078e-02 2.1241e-03 15.1024 < 2.2e-16 ***
## tensq -1.2023e-03 1.7526e-04 -6.8600 7.160e-12 ***
## not_smsa -7.7277e-02 1.4369e-02 -5.3780 7.645e-08 ***
## south -4.6889e-02 1.5675e-02 -2.9913 0.002782 **
## c_city 2.2987e-02 9.9149e-03 2.3185 0.020437 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1441.5
## Residual Sum of Squares: 1404.4
## R-Squared: 0.02879
## Adj. R-Squared: 0.028332
## F-statistic: 85.5285 on 8 and 14865 DF, p-value: < 2.22e-16
stargazer(pols,re,fe,be,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("OLS","RE","FE","BE"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 6,
digits.extra = 7,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ============================================================
## OLS RE FE BE
## union 0.112774*** 0.103677*** 0.093877*** 0.111353***
## (0.006774) (0.006447) (0.006966) (0.016363)
## collgrad 0.350946*** 0.369309*** 0.348509***
## (0.007149) (0.012344) (0.013082)
## age 0.022481*** 0.023031*** 0.024259*** 0.021132**
## (0.004249) (0.003317) (0.003447) (0.010229)
## agesq -0.000306*** -0.000249*** -0.000226*** -0.000343**
## (0.000068) (0.000053) (0.000055) (0.000164)
## tenure 0.054787*** 0.040771*** 0.032966*** 0.095703***
## (0.001944) (0.001583) (0.001646) (0.005392)
## tensq -0.001540*** -0.001247*** -0.001100*** -0.003440***
## (0.000125) (0.000100) (0.000103) (0.000364)
## not_smsa -0.205457*** -0.151136*** -0.093105*** -0.205362***
## (0.007120) (0.009380) (0.012970) (0.014408)
## south -0.140589*** -0.111567*** -0.063222*** -0.130589***
## (0.005850) (0.008467) (0.013279) (0.011458)
## c_city -0.031543*** 0.000397 0.011409 -0.022150
## (0.006683) (0.007492) (0.008896) (0.013859)
## N 19,007 19,007 19,007 4,134
## R2 0.319459 0.349029 0.140054 0.398899
## ============================================================
## Notes: Standard errors in parentheses.
# H0) The null hypothesis for the Breusch-Pagan test is homoskedasticity
# takes too long to compute
# bptest(data = nlswork_clean, ln_wage ~ union +
# collgrad +age +agesq +tenure +tensq +
# not_smsa +south +c_city + factor(idcode), studentize=F)
bptest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city + factor(idcode), studentize=F)
##
## Breusch-Pagan test
##
## data: ln_wage ~ union + collgrad + age + agesq + tenure + tensq + not_smsa + south + c_city + factor(idcode)
## BP = 5.2368, df = 10, p-value = 0.8748
pwtest(data = nlswork_clean, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city)
pbsytest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, test="j")
##
## Baltagi and Li AR-RE joint test
##
## data: formula
## chisq = 50.736, df = 2, p-value = 9.612e-12
## alternative hypothesis: AR(1) errors or random effects
pbgtest(fe, order = 2)
##
## Breusch-Godfrey/Wooldridge test for serial correlation in panel models
##
## data: ln_wage ~ union + collgrad + age + agesq + tenure + tensq + not_smsa + ...
## chisq = 181.23, df = 2, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors
pwartest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city)
##
## Wooldridge's test for serial correlation in FE panels
##
## data: plm.model
## F = 12.205, df1 = 1, df2 = 53, p-value = 0.0009707
## alternative hypothesis: serial correlation
pwfdtest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city)
##
## Wooldridge's first-difference test for serial correlation in panels
##
## data: plm.model
## F = 8.5778, df1 = 1, df2 = 48, p-value = 0.005192
## alternative hypothesis: serial correlation in differenced errors
pwfdtest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, h0="fe")
##
## Wooldridge's first-difference test for serial correlation in panels
##
## data: plm.model
## F = 2.9964, df1 = 1, df2 = 48, p-value = 0.08988
## alternative hypothesis: serial correlation in original errors
pcdtest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city)
##
## Pesaran CD test for cross-sectional dependence in panels
##
## data: ln_wage ~ union + collgrad + age + agesq + tenure + tensq + not_smsa + south + c_city
## z = 1.8975, p-value = 0.05776
## alternative hypothesis: cross-sectional dependence
## CHECK: 'lfe' and 'fixest'
### https://github.com/sgaure/lfe
### https://github.com/lrberge/fixest
# *including 1 fixed effect*
HDFE1a <- feols(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city | idcode)
summary(HDFE1a)
## OLS estimation, Dep. Var.: ln_wage
## Observations: 19,007
## Fixed-effects: idcode: 4,134
## Standard-errors: Clustered (idcode)
## Estimate Std. Error t value Pr(>|t|)
## union 0.093877 0.009566 9.814146 < 2.2e-16 ***
## age 0.024259 0.005008 4.843618 1.3216e-06 ***
## agesq -0.000226 0.000081 -2.778359 5.4881e-03 **
## tenure 0.032966 0.002085 15.807147 < 2.2e-16 ***
## tensq -0.001100 0.000126 -8.719205 < 2.2e-16 ***
## not_smsa -0.093105 0.019791 -4.704515 2.6279e-06 ***
## south -0.063222 0.021654 -2.919622 3.5235e-03 **
## c_city 0.011409 0.012606 0.905058 3.6549e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.225053 Adj. R2: 0.703151
## Within R2: 0.140054
HDFE1b <- felm(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city | idcode, clustervar=c("idcode"))
## Warning: Argument(s) clustervar are deprecated and will be removed, use
## multipart formula instead
summary(HDFE1b)
##
## Call:
## felm(formula = ln_wage ~ union + age + agesq + tenure + tensq + not_smsa + south + c_city | idcode, data = nlswork_clean, clustervar = c("idcode"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8803 -0.1022 0.0000 0.1077 2.8071
##
## Coefficients:
## Estimate Cluster s.e. t value Pr(>|t|)
## union 9.388e-02 9.565e-03 9.814 < 2e-16 ***
## age 2.426e-02 5.008e-03 4.844 1.32e-06 ***
## agesq -2.262e-04 8.141e-05 -2.778 0.00549 **
## tenure 3.297e-02 2.086e-03 15.807 < 2e-16 ***
## tensq -1.100e-03 1.262e-04 -8.719 < 2e-16 ***
## not_smsa -9.310e-02 1.979e-02 -4.705 2.63e-06 ***
## south -6.322e-02 2.165e-02 -2.920 0.00352 **
## c_city 1.141e-02 1.261e-02 0.905 0.36549
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2545 on 14865 degrees of freedom
## Multiple R-squared(full model): 0.7678 Adjusted R-squared: 0.7032
## Multiple R-squared(proj model): 0.1401 Adjusted R-squared: -0.0995
## F-statistic(full model, *iid*):11.87 on 4141 and 14865 DF, p-value: < 2.2e-16
## F-statistic(proj model): 168 on 8 and 4133 DF, p-value: < 2.2e-16
# *including a 2nd fixed effect*
HDFE2a <- feols(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city | idcode + year)
summary(HDFE2a)
## OLS estimation, Dep. Var.: ln_wage
## Observations: 19,007
## Fixed-effects: idcode: 4,134, year: 12
## Standard-errors: Clustered (idcode)
## Estimate Std. Error t value Pr(>|t|)
## union 0.095700 0.009523 10.048910 < 2.2e-16 ***
## age 0.073440 0.013588 5.404711 6.8573e-08 ***
## agesq -0.000720 0.000116 -6.218794 5.5065e-10 ***
## tenure 0.032423 0.002104 15.408152 < 2.2e-16 ***
## tensq -0.001090 0.000129 -8.443532 < 2.2e-16 ***
## not_smsa -0.090537 0.019619 -4.614644 4.0571e-06 ***
## south -0.064281 0.021622 -2.972941 2.9666e-03 **
## c_city 0.010432 0.012668 0.823497 4.1027e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.223942 Adj. R2: 0.705857
## Within R2: 0.066421
HDFE2b <- felm(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city | idcode + year, clustervar=c("idcode"))
## Warning: Argument(s) clustervar are deprecated and will be removed, use
## multipart formula instead
summary(HDFE2b)
##
## Call:
## felm(formula = ln_wage ~ union + age + agesq + tenure + tensq + not_smsa + south + c_city | idcode + year, data = nlswork_clean, clustervar = c("idcode"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.90155 -0.09933 0.00000 0.10738 2.78536
##
## Coefficients:
## Estimate Cluster s.e. t value Pr(>|t|)
## union 0.0956999 0.0095207 10.052 < 2e-16 ***
## age 0.0734400 0.0135842 5.406 6.80e-08 ***
## agesq -0.0007205 0.0001158 -6.221 5.44e-10 ***
## tenure 0.0324225 0.0021036 15.413 < 2e-16 ***
## tensq -0.0010902 0.0001291 -8.446 < 2e-16 ***
## not_smsa -0.0905368 0.0196138 -4.616 4.03e-06 ***
## south -0.0642811 0.0216158 -2.974 0.00296 **
## c_city 0.0104319 0.0126641 0.824 0.41014
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2533 on 14854 degrees of freedom
## Multiple R-squared(full model): 0.7701 Adjusted R-squared: 0.7059
## Multiple R-squared(proj model): 0.06642 Adjusted R-squared: -0.1945
## F-statistic(full model, *iid*):11.98 on 4152 and 14854 DF, p-value: < 2.2e-16
## F-statistic(proj model): 62.59 on 8 and 4133 DF, p-value: < 2.2e-16
See the Stata file ‘stata_do_example.do’ that produces the data in folder tmp_files.
simulated <- read_dta("data/data_simulation.dta")
# names(nlswork)
# head(nlswork)
# str(nlswork)
# eda_report(simulated,output_dir = "EDA/",output_file = "eda_simulated.pdf")
ExpData(simulated,type=1)
ExpData(simulated,type=2)
summary(simulated)
## workerid year ui quarter
## Min. : 1 Min. : 1.0 Min. :0.0004503 Min. :1.000
## 1st Qu.:124 1st Qu.: 3.0 1st Qu.:0.2438383 1st Qu.:2.000
## Median :248 Median : 5.5 Median :0.4766747 Median :2.000
## Mean :248 Mean : 5.5 Mean :0.4879205 Mean :2.517
## 3rd Qu.:372 3rd Qu.: 8.0 3rd Qu.:0.7447072 3rd Qu.:4.000
## Max. :495 Max. :10.0 Max. :0.9957856 Max. :4.000
##
## q1 wage educ exper
## Min. :0.0000 Min. : 662.1 Min. : 0.000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:1226.8 1st Qu.: 2.322 1st Qu.: 9.00
## Median :0.0000 Median :1718.1 Median : 4.646 Median :15.00
## Mean :0.2404 Mean :1998.8 Mean : 5.226 Mean :14.34
## 3rd Qu.:0.0000 3rd Qu.:2482.9 3rd Qu.: 7.635 3rd Qu.:19.00
## Max. :1.0000 Max. :7170.2 Max. :19.446 Max. :28.00
##
## union exper2 lnwage yy1 yy2
## Min. :0.0000 Min. : 0.0 Min. :6.495 Min. :0.0 Min. :0.0
## 1st Qu.:0.0000 1st Qu.: 81.0 1st Qu.:7.112 1st Qu.:0.0 1st Qu.:0.0
## Median :0.0000 Median :225.0 Median :7.449 Median :0.0 Median :0.0
## Mean :0.4863 Mean :247.3 Mean :7.483 Mean :0.1 Mean :0.1
## 3rd Qu.:1.0000 3rd Qu.:361.0 3rd Qu.:7.817 3rd Qu.:0.0 3rd Qu.:0.0
## Max. :1.0000 Max. :784.0 Max. :8.878 Max. :1.0 Max. :1.0
##
## yy3 yy4 yy5 yy6 yy7
## Min. :0.0 Min. :0.0 Min. :0.0 Min. :0.0 Min. :0.0
## 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0
## Median :0.0 Median :0.0 Median :0.0 Median :0.0 Median :0.0
## Mean :0.1 Mean :0.1 Mean :0.1 Mean :0.1 Mean :0.1
## 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:0.0
## Max. :1.0 Max. :1.0 Max. :1.0 Max. :1.0 Max. :1.0
##
## yy8 yy9 yy10 lag_lnwage
## Min. :0.0 Min. :0.0 Min. :0.0 Min. :6.495
## 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:7.085
## Median :0.0 Median :0.0 Median :0.0 Median :7.425
## Mean :0.1 Mean :0.1 Mean :0.1 Mean :7.455
## 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:7.784
## Max. :1.0 Max. :1.0 Max. :1.0 Max. :8.724
## NA's :495
ftable(simulated$year)
## 1 2 3 4 5 6 7 8 9 10
##
## 495 495 495 495 495 495 495 495 495 495
ExpCTable(simulated)
ExpCatViz(simulated)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
ExpNumStat(simulated,by="A",Outlier = TRUE,round=2,Qnt=c(0.1,0.25,0.50,0.99))
ExpNumViz(simulated,Page=c(6,2))
## $`0`
ExpOutliers(simulated,varlist=c("lnwage"))
## $outlier_summary
## Category lnwage
## 1 Lower cap : 0.05 6.75808242272123
## 2 Upper cap : 0.95 8.32169651894011
## 3 Lower bound 6.05
## 4 Upper bound 8.87
## 5 Num of outliers 1
## 6 Lower outlier case
## 7 Upper outlier case 210
## 8 Mean before 7.48
## 9 Mean after 7.48
## 10 Median before 7.44898780138622
## 11 Median after 7.44896911785069
##
## $outlier_data
## workerid year ui quarter q1 wage educ exper union exper2
## 1: 1 1 0.5734584 2 0 1913.481 6.3080420 17 1 289
## 2: 1 2 0.5734584 2 0 2065.687 6.3080420 18 1 324
## 3: 1 3 0.5734584 2 0 2015.642 7.3080420 19 1 361
## 4: 1 4 0.5734584 2 0 2274.630 8.3080425 20 1 400
## 5: 1 5 0.5734584 2 0 2576.639 9.3080425 21 1 441
## ---
## 4946: 495 6 0.3515452 1 1 1257.155 0.0000000 17 1 289
## 4947: 495 7 0.3515452 1 1 1170.614 0.3515451 18 1 324
## 4948: 495 8 0.3515452 1 1 1211.788 0.3515451 19 1 361
## 4949: 495 9 0.3515452 1 1 1346.442 0.3515451 20 1 400
## 4950: 495 10 0.3515452 1 1 1279.959 0.3515451 21 0 441
## lnwage yy1 yy2 yy3 yy4 yy5 yy6 yy7 yy8 yy9 yy10 lag_lnwage
## 1: 7.556680 1 0 0 0 0 0 0 0 0 0 NA
## 2: 7.633218 0 1 0 0 0 0 0 0 0 0 7.556680
## 3: 7.608693 0 0 1 0 0 0 0 0 0 0 7.633218
## 4: 7.729573 0 0 0 1 0 0 0 0 0 0 7.608693
## 5: 7.854241 0 0 0 0 1 0 0 0 0 0 7.729573
## ---
## 4946: 7.136607 0 0 0 0 0 1 0 0 0 0 6.973784
## 4947: 7.065284 0 0 0 0 0 0 1 0 0 0 7.136607
## 4948: 7.099852 0 0 0 0 0 0 0 1 0 0 7.065284
## 4949: 7.205221 0 0 0 0 0 0 0 0 1 0 7.099852
## 4950: 7.154584 0 0 0 0 0 0 0 0 0 1 7.205221
## out_cap_lnwage
## 1: 7.556680
## 2: 7.633218
## 3: 7.608693
## 4: 7.729573
## 5: 7.854241
## ---
## 4946: 7.136607
## 4947: 7.065284
## 4948: 7.099852
## 4949: 7.205221
## 4950: 7.154584
##
## $outlier_index
## $outlier_index$upper_out_index
## $outlier_index$upper_out_index[[1]]
## [1] 210
##
##
## $outlier_index$lower_out_index
## $outlier_index$lower_out_index[[1]]
## numeric(0)
vis_dat(simulated)
# gg_miss_upset(simulated)
stargazer(simulated,
title = "Summary statistics",
label = "tb:statistcis",
table.placement = "ht",
header=FALSE,type="text")
##
## Summary statistics
## ======================================================
## Statistic N Mean St. Dev. Min Max
## ------------------------------------------------------
## workerid 4,950 248.000 142.908 1 495
## year 4,950 5.500 2.873 1 10
## ui 4,950 0.488 0.289 0.0005 0.996
## quarter 4,950 2.517 1.128 1 4
## q1 4,950 0.240 0.427 0 1
## wage 4,950 1,998.779 1,032.194 662.083 7,170.242
## educ 4,950 5.226 3.795 0.000 19.446
## exper 4,950 14.336 6.460 0 28
## union 4,950 0.486 0.500 0 1
## exper2 4,950 247.252 187.126 0 784
## lnwage 4,950 7.483 0.476 6.495 8.878
## yy1 4,950 0.100 0.300 0 1
## yy2 4,950 0.100 0.300 0 1
## yy3 4,950 0.100 0.300 0 1
## yy4 4,950 0.100 0.300 0 1
## yy5 4,950 0.100 0.300 0 1
## yy6 4,950 0.100 0.300 0 1
## yy7 4,950 0.100 0.300 0 1
## yy8 4,950 0.100 0.300 0 1
## yy9 4,950 0.100 0.300 0 1
## yy10 4,950 0.100 0.300 0 1
## lag_lnwage 4,455 7.455 0.470 6.495 8.724
## ------------------------------------------------------
## DASHBOARD
####### ExPanD()
OBSERVE THE MISTAKE FOLLOWING THE INTRODUCTION OF TIME DUMMIES AND EXPERIENCE IN THE FIXED-EFFECTS MODEL
pols <- plm(data = simulated, lnwage ~ educ + exper +
exper2 + factor(year),
model="pooling", index=c("workerid", "year"))
re <- plm(data = simulated, lnwage ~ educ + exper +
exper2 + factor(year),
model="random", index=c("workerid", "year"))
plmtest(pols, type=c("bp"))
##
## Lagrange Multiplier Test - (Breusch-Pagan)
##
## data: lnwage ~ educ + exper + exper2 + factor(year)
## chisq = 19885, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
fe <- plm(data = simulated, lnwage ~ educ + exper +
exper2 + factor(year),
model="within", index=c("workerid", "year"))
pFtest(fe, pols)
##
## F test for individual effects
##
## data: lnwage ~ educ + exper + exper2 + factor(year)
## F = 270.97, df1 = 493, df2 = 4444, p-value < 2.2e-16
## alternative hypothesis: significant effects
phtest(fe, re)
##
## Hausman Test
##
## data: lnwage ~ educ + exper + exper2 + factor(year)
## chisq = 309.92, df = 11, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
pols_robust <- coeftest(pols, function(x) vcovHC(x, type = 'sss'))
re_robust <- coeftest(re, function(x) vcovHC(x, type = 'sss'))
fe_robust <- coeftest(fe, function(x) vcovHC(x, type = 'sss'))
stargazer(pols_robust,re_robust, fe_robust,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("Pooled (cluster)","RE (cluster)","FE (cluster"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 3,
digits.extra = 5,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ========================================================
## Pooled (cluster) RE (cluster) FE (cluster
## educ 0.107*** 0.067*** 0.062***
## (0.002) (0.001) (0.001)
## exper 0.013*** 0.005** 0.025***
## (0.005) (0.002) (0.001)
## exper2 -0.0003* 0.000004 0.000004
## (0.0002) (0.00002) (0.00002)
## factor(year)2 -0.001 0.019*** 0.0002
## (0.004) (0.004) (0.003)
## factor(year)3 -0.002 0.038*** 0.001
## (0.005) (0.005) (0.003)
## factor(year)4 -0.008 0.052*** -0.004*
## (0.007) (0.007) (0.003)
## factor(year)5 -0.004 0.075*** -0.0002
## (0.009) (0.009) (0.002)
## factor(year)6 -0.005 0.095*** 0.001
## (0.010) (0.011) (0.003)
## factor(year)7 -0.009 0.112*** -0.001
## (0.012) (0.013) (0.003)
## factor(year)8 -0.011 0.131*** 0.00004
## (0.014) (0.015) (0.003)
## factor(year)9 -0.013 0.148*** -0.002
## (0.016) (0.017) (0.003)
## factor(year)10 -0.011 0.169***
## (0.018) (0.019)
## ========================================================
## Notes: Standard errors in parentheses.
end_time <- Sys.time()
end_time - start_time
## Time difference of 26.96878 secs
# sprintf(end_time - start_time,fmt = '%#.1f')
#